load requirements

# load libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-4
library(ggpubr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.1.8
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggthemes)

load data

# load data
df_meta_ACC <- read.csv("../Tables/metadata_77_ACC.csv")

# load microbial data from Poore's study
df_ACC_Voom_SNM <- read.csv("../Tables/Voom-SNM-ACC.csv", row.names = 1)
df_ACC_filter_likely_Voom_SNM  <- read.csv("../Tables/Voom-SNM-Filter-Likely-ACC.csv", row.names = 1)
df_ACC_filter_Plate_Center_Voom_SNM <- read.csv("../Tables/Voom-SNM-Filter-Plate_Center-ACC.csv", row.names = 1)
df_ACC_filter_putative_Voom_SNM <- read.csv("../Tables/Voom-SNM-Filter-Putative-ACC.csv", row.names = 1)
df_ACC_filter_stringent_Voom_SNM <- read.csv("../Tables/Voom-SNM-Filter-Stringent-ACC.csv", row.names = 1)

# Virus
df_ACC_Voom_SNM_Vir <- df_ACC_Voom_SNM %>% select(grep("Virus", names(df_ACC_Voom_SNM)))
df_ACC_filter_likely_Voom_SNM_Vir <- df_ACC_filter_likely_Voom_SNM %>% select(grep("Virus", names(df_ACC_filter_likely_Voom_SNM)))
df_ACC_filter_Plate_Center_Voom_SNM_Vir <- df_ACC_filter_Plate_Center_Voom_SNM %>% select(grep("Virus", names(df_ACC_filter_Plate_Center_Voom_SNM)))
df_ACC_filter_putative_Voom_SNM_Vir <- df_ACC_filter_putative_Voom_SNM %>% select(grep("Virus", names(df_ACC_filter_putative_Voom_SNM)))
df_ACC_filter_stringent_Voom_SNM_Vir <- df_ACC_filter_stringent_Voom_SNM %>% select(grep("Virus", names(df_ACC_filter_stringent_Voom_SNM)))

# Archaea
df_ACC_Voom_SNM_Arc <- df_ACC_Voom_SNM %>% select(grep("Archaea", names(df_ACC_Voom_SNM)))
df_ACC_filter_likely_Voom_SNM_Arc <- df_ACC_filter_likely_Voom_SNM %>% select(grep("Archaea", names(df_ACC_filter_likely_Voom_SNM)))
df_ACC_filter_Plate_Center_Voom_SNM_Arc <- df_ACC_filter_Plate_Center_Voom_SNM %>% select(grep("Archaea", names(df_ACC_filter_Plate_Center_Voom_SNM)))
df_ACC_filter_putative_Voom_SNM_Arc <- df_ACC_filter_putative_Voom_SNM %>% select(grep("Archaea", names(df_ACC_filter_putative_Voom_SNM)))
df_ACC_filter_stringent_Voom_SNM_Arc <- df_ACC_filter_stringent_Voom_SNM %>% select(grep("Archaea", names(df_ACC_filter_stringent_Voom_SNM)))

# Bacteria
df_ACC_Voom_SNM_Bac <- df_ACC_Voom_SNM %>% select(grep("Bacteria", names(df_ACC_Voom_SNM)))
df_ACC_filter_likely_Voom_SNM_Bac <- df_ACC_filter_likely_Voom_SNM %>% select(grep("Bacteria", names(df_ACC_filter_likely_Voom_SNM)))
df_ACC_filter_Plate_Center_Voom_SNM_Bac <- df_ACC_filter_Plate_Center_Voom_SNM %>% select(grep("Bacteria", names(df_ACC_filter_Plate_Center_Voom_SNM)))
df_ACC_filter_putative_Voom_SNM_Bac <- df_ACC_filter_putative_Voom_SNM %>% select(grep("Bacteria", names(df_ACC_filter_putative_Voom_SNM)))
df_ACC_filter_stringent_Voom_SNM_Bac <- df_ACC_filter_stringent_Voom_SNM %>% select(grep("Bacteria", names(df_ACC_filter_stringent_Voom_SNM)))

# load clustering results
df_cluster <- read.csv("../Tables/Unspervised_clustering.csv")

examine the beta-diversity between different two clusters from different methods

function_PcoA <- function(in_matrix, cluster_col, title){
  in_matrix[in_matrix<0] <- 0 
  distance <- vegdist(in_matrix, method = 'bray')
  pcoa <- cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE)
  point <- data.frame(pcoa$point)
  pcoa_eig <- (pcoa$eig)[1:2] / sum(pcoa$eig)
  # extract first two coordinate value
  sample_eig <- data.frame({pcoa$point})[1:2]
  sample_eig$id <- rownames(sample_eig)
  names(sample_eig)[1:2] <- c('PCoA1', 'PCoA2')
  group_m <- merge(sample_eig, df_cluster[,c("id", cluster_col)], by = "id", all.x = TRUE)
  # PERMANOVA
  dt <- in_matrix[group_m$id,]
  print(identical(rownames(dt), group_m$id))
  
  group_m[,cluster_col] <- as.factor(group_m[,cluster_col])
  adonis_result <- adonis2(as.formula(paste("dt~", cluster_col, sep = "")), group_m, permutations = 999, distance = 'bray')
  
  R2 <- round(adonis_result$R2[1], 2)
  pvalue <- round(adonis_result$`Pr(>F)`[1], 2)
  plot <- ggscatter(group_m, x= "PCoA1", y = "PCoA2",color=cluster_col,
                    ellipse = TRUE,
                    mean.point = TRUE, star.plot = TRUE,
                    ellipse.level = 0.95,
                    ggtheme = theme_minimal()) +
    labs(x = paste('PCoA1: ', round(100 * pcoa_eig[1], 2), '%'),
         y = paste('PCoA2: ', round(100 * pcoa_eig[2], 2), '%'))+
    theme_classic()+
    scale_color_manual(values = c("#0072B5CC","#E18727CC"))+
    geom_vline(xintercept = 0, color = 'gray', size = 0.4) + 
    geom_hline(yintercept = 0, color = 'gray', size = 0.4) +
    theme(panel.grid = element_line(color = 'black', linetype = 2, size = 0.1), 
          panel.background = element_rect(color = 'black', fill = 'transparent'), 
          legend.title=element_blank())+
    theme(axis.title = element_text(size = 18, colour="black"),
          axis.text = element_text(size = 16, colour = "black"),
          legend.text = element_text(size = 16))+
    geom_text(x=0, y=max(group_m$PCoA2)*0.8, label=paste("PERMANOVA R2 = ",R2 ,"\n","P = ", pvalue, sep = ""))+
    ggtitle(title)
  return(plot)
}

# examine the beta-diversity
for (x in c("Without_combined", "Likely_combined", "PC_combined", "Putative_combined", "Strigent_combined")){
  col_cluster = x
  p_nor <- function_PcoA(df_ACC_Voom_SNM, col_cluster, "Without")
  p_nor_f_likely <- function_PcoA(df_ACC_filter_likely_Voom_SNM, col_cluster, "F_Likely")
  p_nor_f_PC <- function_PcoA(df_ACC_filter_Plate_Center_Voom_SNM, col_cluster, "F_PC")
  p_nor_f_putative <- function_PcoA(df_ACC_filter_putative_Voom_SNM, col_cluster, "F_Putative")
  p_nor_f_strigent <- function_PcoA(df_ACC_filter_stringent_Voom_SNM, col_cluster, "F_Strigent")
  
  p_nor_Vir <- function_PcoA(df_ACC_Voom_SNM_Vir, col_cluster, "Without_Vir")
  p_nor_f_likely_Vir <- function_PcoA(df_ACC_filter_likely_Voom_SNM_Vir, col_cluster, "F_Likely_Vir")
  p_nor_f_PC_Vir <- function_PcoA(df_ACC_filter_Plate_Center_Voom_SNM_Vir, col_cluster, "F_PC_Vir")
  p_nor_f_putative_Vir <- function_PcoA(df_ACC_filter_putative_Voom_SNM_Vir, col_cluster, "F_Putative_Vir")
  p_nor_f_strigent_Vir <- function_PcoA(df_ACC_filter_stringent_Voom_SNM_Vir, col_cluster, "F_Strigent_Vir")
  
  p_nor_Arc <- function_PcoA(df_ACC_Voom_SNM_Arc, col_cluster, "Without_Arc")
  p_nor_f_likely_Arc <- function_PcoA(df_ACC_filter_likely_Voom_SNM_Arc, col_cluster, "F_Likely_Arc")
  p_nor_f_PC_Arc <- function_PcoA(df_ACC_filter_Plate_Center_Voom_SNM_Arc, col_cluster, "F_PC_Arc")
  p_nor_f_putative_Arc <- function_PcoA(df_ACC_filter_putative_Voom_SNM_Arc, col_cluster, "F_Putative_Arc")
  p_nor_f_strigent_Arc <- function_PcoA(df_ACC_filter_stringent_Voom_SNM_Arc, col_cluster, "F_Strigent_Arc")
  
  p_nor_Bac <- function_PcoA(df_ACC_Voom_SNM_Bac, col_cluster, "Without_Bac")
  p_nor_f_likely_Bac <- function_PcoA(df_ACC_filter_likely_Voom_SNM_Bac, col_cluster, "F_Likely_Bac")
  p_nor_f_PC_Bac <- function_PcoA(df_ACC_filter_Plate_Center_Voom_SNM_Bac, col_cluster, "F_PC_Bac")
  p_nor_f_putative_Bac <- function_PcoA(df_ACC_filter_putative_Voom_SNM_Bac, col_cluster, "F_Putative_Bac")
  p_nor_f_strigent_Bac <- function_PcoA(df_ACC_filter_stringent_Voom_SNM_Bac, col_cluster, "F_Strigent_Bac")
  
  # pdf(file = paste("Figures/ACC_beta_diversity_TWO_Cluster_",col_cluster,".pdf", sep = ""), width = 6, height = 5)
  print(p_nor)
  print(p_nor_f_likely)
  print(p_nor_f_PC)
  print(p_nor_f_putative)
  print(p_nor_f_strigent)
  print(p_nor_Vir)
  print(p_nor_f_likely_Vir)
  print(p_nor_f_PC_Vir)
  print(p_nor_f_putative_Vir)
  print(p_nor_f_strigent_Vir)
  print(p_nor_Arc)
  print(p_nor_f_likely_Arc)
  print(p_nor_f_PC_Arc)
  print(p_nor_f_putative_Arc)
  print(p_nor_f_strigent_Arc)
  print(p_nor_Bac)
  print(p_nor_f_likely_Bac)
  print(p_nor_f_PC_Bac)
  print(p_nor_f_putative_Bac)
  print(p_nor_f_strigent_Bac)
  # dev.off()
}
## [1] TRUE
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 50
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 46
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 51
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 48
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 41
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 43
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 42
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 29
## of the first 76 eigenvalues are > 0
## [1] TRUE
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 73
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0
## [1] TRUE

## [1] TRUE
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 50
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 46
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 51
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 48
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 41
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 43
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 42
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 29
## of the first 76 eigenvalues are > 0
## [1] TRUE
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 73
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0

## [1] TRUE

## [1] TRUE
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 50
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 46
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 51
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 48
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 41
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 43
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 42
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 29
## of the first 76 eigenvalues are > 0
## [1] TRUE
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 73
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0

## [1] TRUE

## [1] TRUE
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 50
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 46
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 51
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 48
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 41
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 43
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 42
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 29
## of the first 76 eigenvalues are > 0
## [1] TRUE
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 73
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0

## [1] TRUE

## [1] TRUE
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 50
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 46
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 51
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 48
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 41
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 43
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 42
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 39
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 29
## of the first 76 eigenvalues are > 0
## [1] TRUE
## [1] TRUE
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 75
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 73
## of the first 76 eigenvalues are > 0
## [1] TRUE
## Warning in cmdscale(distance, k = (nrow(in_matrix) - 1), eig = TRUE): only 45
## of the first 76 eigenvalues are > 0

## [1] TRUE

examine the alpha-diversity between different two clusters from different methods

df_alpha <- read.csv("../Tables/Alpha_diversity_ACC.csv")

for (col_cluster in c("Without_combined", "Likely_combined","PC_combined", "Putative_combined", "Strigent_combined")){
  #col_cluster = "Putative_combined"
  df_alpha_cluster <- merge(df_alpha, df_cluster[,c("id", col_cluster)], by="id", all.x = TRUE)
  df_alpha_cluster[,col_cluster][df_alpha_cluster[,col_cluster]==1] <- "MS1"
  df_alpha_cluster[,col_cluster][df_alpha_cluster[,col_cluster]==2] <- "MS2"
  
  # pdf(file = paste("Figures/ACC_Alpha_diversity_TWO_Cluster_",col_cluster,".pdf",sep = ""), width = 3, height = 5)
  for (i in 2:37){
    df_select <- df_alpha_cluster[,c(i, 38)]
    p <- df_select %>%
      ggplot(aes(x=df_select[,2], y=df_select[,1], fill=df_select[,2])) +
      geom_boxplot()+
      theme_base()+
      stat_compare_means(method = "wilcox.test")+
      xlab("") +
      ylab("Shannon Index")+
      scale_fill_manual(values = c("#0072B5CC","#E18727CC"))+
      theme(legend.position = "none")+
      ggtitle(names(df_alpha_cluster)[i])
    print(p)
  }
  # dev.off()
}

examine the confounders impacting microbial community

df_clinical <- read.csv("../00.Data/ACC_clinic.csv")
confounders <- names(df_clinical)[names(df_clinical)!="id"]
function_adonis2 <- function(in_matrix, identifier){
  df_all <- data.frame()
  dt <- in_matrix[df_clinical$id,]
  dt[dt<0] <- 0
  print(identical(rownames(dt), df_clinical$id))
  set.seed(123)
  for (confounder in confounders){
    adonis_result <- adonis2(as.formula(paste("dt~",confounder, sep = "")),
                             df_clinical, permutations = 999, distance = 'bray', na.action = na.omit)
    df_all <- rbind(df_all, as.data.frame(adonis_result)[1,])
  }
  df_all$adj.P <- p.adjust(df_all[,"Pr(>F)"], method="BH")
  df_all$identifier <- identifier
  return(df_all)
}


df_nor <- function_adonis2(df_ACC_Voom_SNM, "Overall")
## [1] TRUE
df_nor_f_likely <- function_adonis2(df_ACC_filter_likely_Voom_SNM, "Overall_f_likely")
## [1] TRUE
df_nor_f_PC <- function_adonis2(df_ACC_filter_Plate_Center_Voom_SNM, "Overall_f_PC")
## [1] TRUE
df_nor_f_putative <- function_adonis2(df_ACC_filter_putative_Voom_SNM, "Overall_f_putative")
## [1] TRUE
df_nor_f_strigent <- function_adonis2(df_ACC_filter_stringent_Voom_SNM, "Overall_f_strigent")
## [1] TRUE
df_nor_Vir <- function_adonis2(df_ACC_Voom_SNM_Vir, "Virus")
## [1] TRUE
df_nor_f_likelyr_Vir <- function_adonis2(df_ACC_filter_likely_Voom_SNM_Vir, "Virus_f_likely")
## [1] TRUE
df_nor_f_PCr_Vir <- function_adonis2(df_ACC_filter_Plate_Center_Voom_SNM_Vir, "Virus_f_PC")
## [1] TRUE
df_nor_f_putativer_Vir <- function_adonis2(df_ACC_filter_putative_Voom_SNM_Vir, "Virus_f_putative")
## [1] TRUE
df_nor_Arc <- function_adonis2(df_ACC_Voom_SNM_Arc, "Archaea")
## [1] TRUE
df_nor_f_likelyr_Arc <- function_adonis2(df_ACC_filter_likely_Voom_SNM_Arc, "Archaea_f_likely")
## [1] TRUE
df_nor_f_PCr_Arc <- function_adonis2(df_ACC_filter_Plate_Center_Voom_SNM_Arc, "Archaea_f_PC")
## [1] TRUE
df_nor_f_putativer_Arc <- function_adonis2(df_ACC_filter_putative_Voom_SNM_Arc, "Archaea_f_putative")
## [1] TRUE
df_nor_Bac <- function_adonis2(df_ACC_Voom_SNM_Bac, "Bacteria")
## [1] TRUE
df_nor_f_likelyr_Bac <- function_adonis2(df_ACC_filter_likely_Voom_SNM_Bac, "Bacteria_f_likely")
## [1] TRUE
df_nor_f_PCr_Bac <- function_adonis2(df_ACC_filter_Plate_Center_Voom_SNM_Bac, "Bacteria_f_PC")
## [1] TRUE
df_nor_f_putativer_Bac <- function_adonis2(df_ACC_filter_putative_Voom_SNM_Bac, "Bacteria_f_putative")
## [1] TRUE
df_nor_f_strigent_Bac <- function_adonis2(df_ACC_filter_stringent_Voom_SNM_Bac, "Bacteria_f_strigent")
## [1] TRUE
df_all_adonis <- rbind(df_nor, df_nor_f_likely, df_nor_f_PC, df_nor_f_putative, df_nor_f_strigent,
                       df_nor_Vir, df_nor_f_likelyr_Vir, df_nor_f_PCr_Vir, df_nor_f_putativer_Vir,
                       df_nor_Arc, df_nor_f_likelyr_Arc, df_nor_f_PCr_Arc, df_nor_f_putativer_Arc,
                       df_nor_Bac, df_nor_f_likelyr_Bac, df_nor_f_PCr_Bac, df_nor_f_putativer_Bac, df_nor_f_strigent_Bac)

write.csv(df_all_adonis, file="../Tables/Confounders_Impacting_Microbime.csv", row.names = TRUE)